######################################################
#PA 2018 Democratic Primary Model.
# When Women Run, Voters Will Follow (Sometimes): 
##Examining the Mobilizing Effect of Female Candidates in the 2014 and 2018 Midterm Elections 
#By Safarpour, Wyckoff Gaynor, Rouse, and Swers #
######################################################

#clean the environment.
rm(list=ls()) 

#Packages.
library(mvtnorm) 
library(stargazer)
library(stats)
library(arm)
library(plyr)
library(Hmisc)
library(dplyr)

#Setwd.
setwd("/Users/ACS/Dropbox/When Women Run/Revision_PoliticalBehavior/R&R Part 2/Publication Docs/Replication Data and Code/")

#Data.
PA<- read.csv("PA2018PrimaryDataFinal.csv", header=TRUE) 

#Model predicting democratic turnout in 2018 PA Primary.
logitbasic<- glm(voted2018primary~ womaninprimarydem+ 
                   female2+ womaninprimarydem*female2+ 
                   GenerationZ+ womaninprimarydem*GenerationZ+
                   Millennials+ womaninprimarydem*Millennials+
                   GenerationX+ womaninprimarydem*GenerationX+
                   SilentGeneration+ womaninprimarydem*SilentGeneration+
                   voted2016r+Proptrumpvote2016,
                 data=PA, family=binomial(link="logit"))
summary(logitbasic)

stargazer(logitbasic, out="PA2018PrimaryMainModelTableFormatted.html",
          type="html",
          covariate.labels = c("Female Candidate", "Female", "Generation Z",
                               "Millennials", "Generation X","Silent Generation", 
                               "Voted in 2016 General Election", "County Trump Vote 2016", 
                               "Female Candidate*Female",
                               "Female Candidate*Generation Z",
                               "Female Candidate*Millennials",
                               "Female Candidate*Generation X",
                               "Female Candidate*Silent Generation"),
          dep.var.labels="Voted in PA 2018 Democratic Primary", column.labels = "",
          title = "Effects of candidate gender, generation, gender, prior voting, and county Trump vote share on 2018 Democratic Primary Turnout in PA",
          notes        = "Results from logistic regression. Standard errors in parentheses. Baseline age category: Baby Boomers. Model excludes non-Democratic registrants and all those with registration date after deadline to vote in primary.", 
          notes.append = TRUE, notes.align = "l", digits=3, single.row = F)

#Compute Predicted Probabilities.
 sample <- PA[complete.cases(PA$voted2018primary, PA$womaninprimarydem, PA$SilentGeneration, 
                            PA$Millennials, PA$GenerationZ, PA$GenerationX, PA$female2,
                            PA$voted2016r, PA$Proptrumpvote2016)==T,] # Regression sample
rm(PA)
preds <- predict(logitbasic, type = "response")
options(digits=4)
summary(preds) 

#Overall effect for woman in primary.
n_draws <- 1000
set.seed(1714)
vcov<- vcov(logitbasic)
coef<-coef(logitbasic)
sim_coefs <- rmvnorm(n_draws, coef, vcov) #Specify the 1,000 simulated coefficients.
rbind(coef(logitbasic), apply(sim_coefs, 2, mean)) # Check they are close to original

ppwomen0.s <- NULL
for (i in 1:n_draws) {
  ppwomen0.s[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                                   sim_coefs[i,3]*sample$female2 + 
                                   sim_coefs[i,4]*sample$GenerationZ +
                                   sim_coefs[i,5]*sample$Millennials +
                                   sim_coefs[i,6]*sample$GenerationX +
                                   sim_coefs[i,7]*sample$SilentGeneration+
                                   sim_coefs[i,8]*sample$voted2016r+
                                   sim_coefs[i,9]*sample$Proptrumpvote2016+
                                   sim_coefs[i,10]*sample$female2*0+
                                   sim_coefs[i,11]*sample$GenerationZ*0 +
                                   sim_coefs[i,12]*sample$Millennials*0 +
                                   sim_coefs[i,13]*sample$GenerationX*0 +
                                   sim_coefs[i,14]*sample$SilentGeneration*0))
}
mean(ppwomen0.s) 

ppwomen1.s <- NULL
for (i in 1:n_draws) {
  ppwomen1.s[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                                   sim_coefs[i,3]*sample$female2 + 
                                   sim_coefs[i,4]*sample$GenerationZ +
                                   sim_coefs[i,5]*sample$Millennials +
                                   sim_coefs[i,6]*sample$GenerationX +
                                   sim_coefs[i,7]*sample$SilentGeneration+
                                   sim_coefs[i,8]*sample$voted2016r+
                                   sim_coefs[i,9]*sample$Proptrumpvote2016+
                                   sim_coefs[i,10]*sample$female2*1+
                                   sim_coefs[i,11]*sample$GenerationZ*1 +
                                   sim_coefs[i,12]*sample$Millennials*1 +
                                   sim_coefs[i,13]*sample$GenerationX*1 +
                                   sim_coefs[i,14]*sample$SilentGeneration*1))
}
mean(ppwomen1.s) 

effect.01 <-  ppwomen1.s-ppwomen0.s
summary(effect.01) # (0-1 women candidate effect)

# CIs and put results together in matrix
elements <- list(ppwomen0.s, ppwomen1.s, effect.01) 
lapply(elements, summary) # See summary for each element of the list
ci <- do.call("rbind", (lapply(elements, quantile, c(.025,.975)))) # Get quntiles for each

results <- cbind(ci[,1], lapply(elements, mean), ci[,2])
colnames(results) <- c("2.5", "Mean", "97.5")   
rownames(results) <- c("No Women Ran", "Female Candidate", "Effect No Women-Female Candidate")
results

##Get Predictions by Generation. 
w0.SG	<- NULL
w1.SG 	<- NULL
w0.BB	<- NULL
w1.BB 	<- NULL
w0.GX	<- NULL
w1.GX 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL
w0.GZ	<- NULL
w1.GZ 	<- NULL

SG <- subset(sample, SilentGeneration==1)
BB <- subset(sample, BabyBoomers==1)
GX <- subset(sample, GenerationX==1) 
M <- subset(sample, Millennials==1) 
GZ <- subset(sample, GenerationZ==1)


for(i in 1:n_draws){ 
  w0.SG[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*SG$female2 + 
                              sim_coefs[i,4]*SG$GenerationZ +
                              sim_coefs[i,5]*SG$Millennials +
                              sim_coefs[i,6]*SG$GenerationX +
                              sim_coefs[i,7]*SG$SilentGeneration+
                              sim_coefs[i,8]*SG$voted2016r+
                              sim_coefs[i,9]*SG$Proptrumpvote2016+
                              sim_coefs[i,10]*SG$female2*0+
                              sim_coefs[i,11]*SG$GenerationZ*0 +
                              sim_coefs[i,12]*SG$Millennials*0 +
                              sim_coefs[i,13]*SG$GenerationX*0 +
                              sim_coefs[i,14]*SG$SilentGeneration*0))
  w1.SG[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*SG$female2 + 
                              sim_coefs[i,4]*SG$GenerationZ +
                              sim_coefs[i,5]*SG$Millennials +
                              sim_coefs[i,6]*SG$GenerationX +
                              sim_coefs[i,7]*SG$SilentGeneration+
                              sim_coefs[i,8]*SG$voted2016r+
                              sim_coefs[i,9]*SG$Proptrumpvote2016+
                              sim_coefs[i,10]*SG$female2*1+
                              sim_coefs[i,11]*SG$GenerationZ*1 +
                              sim_coefs[i,12]*SG$Millennials*1 +
                              sim_coefs[i,13]*SG$GenerationX*1 +
                              sim_coefs[i,14]*SG$SilentGeneration*1))
  w0.BB[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*BB$female2 + 
                              sim_coefs[i,4]*BB$GenerationZ +
                              sim_coefs[i,5]*BB$Millennials +
                              sim_coefs[i,6]*BB$GenerationX +
                              sim_coefs[i,7]*BB$SilentGeneration+
                              sim_coefs[i,8]*BB$voted2016r+
                              sim_coefs[i,9]*BB$Proptrumpvote2016+
                              sim_coefs[i,10]*BB$female2*0+
                              sim_coefs[i,11]*BB$GenerationZ*0 +
                              sim_coefs[i,12]*BB$Millennials*0 +
                              sim_coefs[i,13]*BB$GenerationX*0 +
                              sim_coefs[i,14]*BB$SilentGeneration*0))
  w1.BB[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*BB$female2 + 
                              sim_coefs[i,4]*BB$GenerationZ +
                              sim_coefs[i,5]*BB$Millennials +
                              sim_coefs[i,6]*BB$GenerationX +
                              sim_coefs[i,7]*BB$SilentGeneration+
                              sim_coefs[i,8]*BB$voted2016r+
                              sim_coefs[i,9]*BB$Proptrumpvote2016+
                              sim_coefs[i,10]*BB$female2*1+
                              sim_coefs[i,11]*BB$GenerationZ*1 +
                              sim_coefs[i,12]*BB$Millennials*1 +
                              sim_coefs[i,13]*BB$GenerationX*1 +
                              sim_coefs[i,14]*BB$SilentGeneration*1))
  w0.GX[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*GX$female2 + 
                              sim_coefs[i,4]*GX$GenerationZ +
                              sim_coefs[i,5]*GX$Millennials +
                              sim_coefs[i,6]*GX$GenerationX +
                              sim_coefs[i,7]*GX$SilentGeneration+
                              sim_coefs[i,8]*GX$voted2016r+
                              sim_coefs[i,9]*GX$Proptrumpvote2016+
                              sim_coefs[i,10]*GX$female2*0+
                              sim_coefs[i,11]*GX$GenerationZ*0 +
                              sim_coefs[i,12]*GX$Millennials*0 +
                              sim_coefs[i,13]*GX$GenerationX*0 +
                              sim_coefs[i,14]*GX$SilentGeneration*0))
  w1.GX[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*GX$female2 + 
                              sim_coefs[i,4]*GX$GenerationZ +
                              sim_coefs[i,5]*GX$Millennials +
                              sim_coefs[i,6]*GX$GenerationX +
                              sim_coefs[i,7]*GX$SilentGeneration+
                              sim_coefs[i,8]*GX$voted2016r+
                              sim_coefs[i,9]*GX$Proptrumpvote2016+
                              sim_coefs[i,10]*GX$female2*1+
                              sim_coefs[i,11]*GX$GenerationZ*1 +
                              sim_coefs[i,12]*GX$Millennials*1 +
                              sim_coefs[i,13]*GX$GenerationX*1 +
                              sim_coefs[i,14]*GX$SilentGeneration*1))
  w0.M[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                             sim_coefs[i,3]*M$female2 + 
                             sim_coefs[i,4]*M$GenerationZ +
                             sim_coefs[i,5]*M$Millennials +
                             sim_coefs[i,6]*M$GenerationX +
                             sim_coefs[i,7]*M$SilentGeneration+
                             sim_coefs[i,8]*M$voted2016r+
                             sim_coefs[i,9]*M$Proptrumpvote2016+
                             sim_coefs[i,10]*M$female2*0+
                             sim_coefs[i,11]*M$GenerationZ*0 +
                             sim_coefs[i,12]*M$Millennials*0 +
                             sim_coefs[i,13]*M$GenerationX*0 +
                             sim_coefs[i,14]*M$SilentGeneration*0))
  w1.M[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                             sim_coefs[i,3]*M$female2 + 
                             sim_coefs[i,4]*M$GenerationZ +
                             sim_coefs[i,5]*M$Millennials +
                             sim_coefs[i,6]*M$GenerationX +
                             sim_coefs[i,7]*M$SilentGeneration+
                             sim_coefs[i,8]*M$voted2016r+
                             sim_coefs[i,9]*M$Proptrumpvote2016+
                             sim_coefs[i,10]*M$female2*1+
                             sim_coefs[i,11]*M$GenerationZ*1 +
                             sim_coefs[i,12]*M$Millennials*1 +
                             sim_coefs[i,13]*M$GenerationX*1 +
                             sim_coefs[i,14]*M$SilentGeneration*1))
  w0.GZ[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*GZ$female2 + 
                              sim_coefs[i,4]*GZ$GenerationZ +
                              sim_coefs[i,5]*GZ$Millennials +
                              sim_coefs[i,6]*GZ$GenerationX +
                              sim_coefs[i,7]*GZ$SilentGeneration+
                              sim_coefs[i,8]*GZ$voted2016r+
                              sim_coefs[i,9]*GZ$Proptrumpvote2016+
                              sim_coefs[i,10]*GZ$female2*0+
                              sim_coefs[i,11]*GZ$GenerationZ*0 +
                              sim_coefs[i,12]*GZ$Millennials*0 +
                              sim_coefs[i,13]*GZ$GenerationX*0 +
                              sim_coefs[i,14]*GZ$SilentGeneration*0))
  w1.GZ[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*GZ$female2 + 
                              sim_coefs[i,4]*GZ$GenerationZ +
                              sim_coefs[i,5]*GZ$Millennials +
                              sim_coefs[i,6]*GZ$GenerationX +
                              sim_coefs[i,7]*GZ$SilentGeneration+
                              sim_coefs[i,8]*GZ$voted2016r+
                              sim_coefs[i,9]*GZ$Proptrumpvote2016+
                              sim_coefs[i,10]*GZ$female2*1+
                              sim_coefs[i,11]*GZ$GenerationZ*1 +
                              sim_coefs[i,12]*GZ$Millennials*1 +
                              sim_coefs[i,13]*GZ$GenerationX*1 +
                              sim_coefs[i,14]*GZ$SilentGeneration*1))}
effect.SG	<- w1.SG - w0.SG 
effect.BB	<-  w1.BB - w0.BB
effect.GX	<-  w1.GX - w0.GX
effect.M	<-  w1.M - w0.M
effect.GZ	<-  w1.GZ - w0.GZ

#drop what we no longer need.
rm(SG, BB, GX, M, GZ)

#Effect by gender subgroup.
w0.Men	<- NULL
w1.Men 	<- NULL
w0.Women	<- NULL
w1.Women 	<- NULL

Men <- subset(sample, female2==0)
Women <- subset(sample, female2==1) 

for(i in 1:n_draws){ 
  w0.Men[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                               sim_coefs[i,3]*Men$female2 + 
                               sim_coefs[i,4]*Men$GenerationZ +
                               sim_coefs[i,5]*Men$Millennials +
                               sim_coefs[i,6]*Men$GenerationX +
                               sim_coefs[i,7]*Men$SilentGeneration+
                               sim_coefs[i,8]*Men$voted2016r+
                               sim_coefs[i,9]*Men$Proptrumpvote2016+
                               sim_coefs[i,10]*Men$female2*0+
                               sim_coefs[i,11]*Men$GenerationZ*0 +
                               sim_coefs[i,12]*Men$Millennials*0 +
                               sim_coefs[i,13]*Men$GenerationX*0 +
                               sim_coefs[i,14]*Men$SilentGeneration*0))
  w1.Men[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                               sim_coefs[i,3]*Men$female2 + 
                               sim_coefs[i,4]*Men$GenerationZ +
                               sim_coefs[i,5]*Men$Millennials +
                               sim_coefs[i,6]*Men$GenerationX +
                               sim_coefs[i,7]*Men$SilentGeneration+
                               sim_coefs[i,8]*Men$voted2016r+
                               sim_coefs[i,9]*Men$Proptrumpvote2016+
                               sim_coefs[i,10]*Men$female2*1+
                               sim_coefs[i,11]*Men$GenerationZ*1 +
                               sim_coefs[i,12]*Men$Millennials*1 +
                               sim_coefs[i,13]*Men$GenerationX*1 +
                               sim_coefs[i,14]*Men$SilentGeneration*1))
  w0.Women[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                                 sim_coefs[i,3]*Women$female2 + 
                                 sim_coefs[i,4]*Women$GenerationZ +
                                 sim_coefs[i,5]*Women$Millennials +
                                 sim_coefs[i,6]*Women$GenerationX +
                                 sim_coefs[i,7]*Women$SilentGeneration+
                                 sim_coefs[i,8]*Women$voted2016r+
                                 sim_coefs[i,9]*Women$Proptrumpvote2016+
                                 sim_coefs[i,10]*Women$female2*0+
                                 sim_coefs[i,11]*Women$GenerationZ*0 +
                                 sim_coefs[i,12]*Women$Millennials*0 +
                                 sim_coefs[i,13]*Women$GenerationX*0 +
                                 sim_coefs[i,14]*Women$SilentGeneration*0))
  w1.Women[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                                   sim_coefs[i,3]*Women$female2 + 
                                   sim_coefs[i,4]*Women$GenerationZ +
                                   sim_coefs[i,5]*Women$Millennials +
                                   sim_coefs[i,6]*Women$GenerationX +
                                   sim_coefs[i,7]*Women$SilentGeneration+
                                   sim_coefs[i,8]*Women$voted2016r+
                                   sim_coefs[i,9]*Women$Proptrumpvote2016+
                                   sim_coefs[i,10]*Women$female2*1+
                                   sim_coefs[i,11]*Women$GenerationZ*1 +
                                   sim_coefs[i,12]*Women$Millennials*1 +
                                   sim_coefs[i,13]*Women$GenerationX*1 +
                                   sim_coefs[i,14]*Women$SilentGeneration*1))
}
effect.Menm1	<- w1.Men - w0.Men 
effect.Womenm1	<-  w1.Women - w0.Women


#Store the Effects.
Election<-c(rep("2018 Primary (D)",8))
Effect<-c(mean(effect.01), mean(effect.Womenm1), mean(effect.Menm1), mean(effect.GZ), mean(effect.M),mean(effect.GX),mean(effect.BB),mean(effect.SG))
LowerCI<-c(quantile(effect.01, c(.025)),  quantile(effect.Womenm1, c(.025)), quantile(effect.Menm1, c(.025)), quantile(effect.GZ, c(.025)),quantile(effect.M, c(.025)),quantile(effect.GX, c(.025)),quantile(effect.BB, c(.025)),quantile(effect.SG, c(.025)))
UpperCI<-c(quantile(effect.01, c(.975)), quantile(effect.Womenm1, c(.975)), quantile(effect.Menm1, c(.975)), quantile(effect.GZ, c(.975)), quantile(effect.M, c(.975)),quantile(effect.GX, c(.975)),quantile(effect.BB, c(.975)),quantile(effect.SG, c(.975)))
PAprimary<-data.frame(Effect, UpperCI, LowerCI, Election) 
PAprimary$`Registrant Subgroup`<-factor(c(1,2,3,4,5,6,7,8), levels=c(1,2,3,4,5,6,7,8),
                                        labels=c("Overall", "Women", "Men", "Gen. Z", "Millennials", "Gen. X", "Baby\nBoomers", "Silent Gen."), ordered = T)
#write effects to data frame.
write.csv(PAprimary, "PA2018PrimaryEffects.csv", row.names=F)

